package com.ruoyi.common.utils.file;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.File;
import java.io.FileOutputStream;

/**
 * TODO
 *
 * @author chenchuan2
 * @date 2023-05-11 17:04:07
 */
public class PDFUtil {
    public static String saveResultToTxt(String content) {
        String filepath = "/Users/chenchuan/Downloads/chatgpt/result.txt";
        try {
            File file = new File(filepath);
            if (!file.exists()) {
                file.createNewFile();
            }
            FileOutputStream outStream = new FileOutputStream(file);
            outStream.write(content.trim().getBytes());
            outStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return filepath;
    }

    public static String getPDFContent(String path) {
        File pdfFile = new File(path);
        PDDocument document = null;
        String content = "";
        try {

            // 方式二：
            document = PDDocument.load(pdfFile);

            // 获取页码
            int pages = document.getNumberOfPages();
            // 读文本内容
            PDFTextStripper stripper = new PDFTextStripper();
            // 设置按顺序输出
            stripper.setSortByPosition(true);
            stripper.setStartPage(1);
            stripper.setEndPage(pages);
            content = stripper.getText(document);
            document.close();
        } catch (Exception e) {
            System.out.println(e);
        }
        System.out.println(content);
        return content;
    }

    public static void main(String[] args) {
        String path = "/Users/chenchuan/Downloads/chatgpt/fujian1.pdf";
        getPDFContent(path);
    }
}
